{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2361f163-62bb-408e-9960-a62265414968",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, DataCollatorForSeq2Seq\n",
    "from peft import LoraConfig, get_peft_model\n",
    "from trl import SFTTrainer\n",
    "import pandas as pd\n",
    "from datasets import Dataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "42b279eb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "659ddcd76f8c4cf1b34639d968bcf190",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e9403f003e344d8ea7113bfa6dec91ed",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/3729 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/lib/python3.12/site-packages/trl/trainer/sft_trainer.py:300: UserWarning: You passed a processing_class with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `processing_class.padding_side = 'right'` to your code.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# 定义模型名称\n",
    "model_name = \"/root/autodl-tmp/deepseek-r1-distill-llama-8b\"  # 模型文件夹路径\n",
    "\n",
    "# Tokenizer\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "tokenizer.pad_token_id = tokenizer.eos_token_id\n",
    "tokenizer.padding_side = 'right'  # 确保 padding_side 为 'right'\n",
    "\n",
    "# 加载模型\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name,\n",
    "    device_map={\"\": 0},  # 将模型加载到第一个 GPU\n",
    "    trust_remote_code=True  # 确保加载自定义代码\n",
    ")\n",
    "\n",
    "# LoRA配置\n",
    "lora_config = LoraConfig(\n",
    "    task_type=\"CAUSAL_LM\",  # 微调模型为自回归模型\n",
    "    r=16,  # LoRA 低秩分解的秩\n",
    "    lora_alpha=32,  # LoRA 缩放因子\n",
    "    target_modules=[\"q_proj\", \"v_proj\"],  # 目标模块，根据LLaMA3模型结构指定\n",
    "    lora_dropout=0.05,  # Dropout 概率\n",
    "    bias=\"none\",  # 不训练 bias\n",
    "    init_lora_weights=True,  # 初始化 LoRA 层权重\n",
    "    inference_mode=False  # 允许训练\n",
    ")\n",
    "\n",
    "# 将LoRA配置应用到模型\n",
    "model = get_peft_model(model, lora_config)\n",
    "\n",
    "# 定义训练参数\n",
    "training_arguments = TrainingArguments(\n",
    "    output_dir=\"./Llama3_8b_LoRA_2025\",\n",
    "    eval_strategy=\"no\",  # 禁用评估\n",
    "    optim=\"paged_adamw_8bit\",\n",
    "    per_device_train_batch_size=2,\n",
    "    gradient_accumulation_steps=4,\n",
    "    per_device_eval_batch_size=8,\n",
    "    log_level=\"debug\",\n",
    "    save_strategy=\"epoch\",\n",
    "    logging_steps=80,\n",
    "    learning_rate=1e-4,\n",
    "    fp16=False,  # 根据硬件支持选择\n",
    "    bf16=False,  # 根据硬件支持选择\n",
    "    num_train_epochs=6,\n",
    "    warmup_ratio=0.1,\n",
    "    lr_scheduler_type=\"linear\",\n",
    ")\n",
    "\n",
    "# 数据预处理\n",
    "def process_func(example):\n",
    "    MAX_LENGTH = 384\n",
    "    input_ids, attention_mask, labels = [], [], []\n",
    "    instruction = tokenizer(f\"User: {example['instruction']} {example['input']}\\n\\n\", add_special_tokens=False)\n",
    "    response = tokenizer(f\"Assistant: {example['output']}{tokenizer.eos_token}\", add_special_tokens=False)\n",
    "    input_ids = instruction[\"input_ids\"] + response[\"input_ids\"]\n",
    "    attention_mask = instruction[\"attention_mask\"] + response[\"attention_mask\"]\n",
    "    labels = [-100] * len(instruction[\"input_ids\"]) + response[\"input_ids\"]\n",
    "    if len(input_ids) > MAX_LENGTH:  # 截断\n",
    "        input_ids = input_ids[:MAX_LENGTH]\n",
    "        attention_mask = attention_mask[:MAX_LENGTH]\n",
    "        labels = labels[:MAX_LENGTH]\n",
    "    return {\n",
    "        \"input_ids\": input_ids,\n",
    "        \"attention_mask\": attention_mask,\n",
    "        \"labels\": labels\n",
    "    }\n",
    "\n",
    "# 加载数据\n",
    "df = pd.read_json('./huanhuan.json')\n",
    "ds = Dataset.from_pandas(df)\n",
    "tokenized_id = ds.map(process_func, remove_columns=ds.column_names)\n",
    "\n",
    "# 创建训练器\n",
    "trainer = SFTTrainer(\n",
    "    model=model,\n",
    "    args=training_arguments,\n",
    "    train_dataset=tokenized_id,\n",
    "    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c51a838f-44cd-4fee-aa7b-2509c2dfefb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'instruction': ['小姐，别的秀女都在求中选，唯有咱们小姐想被撂牌子，菩萨一定记得真真儿的——',\n",
       "  '这个温太医啊，也是古怪，谁不知太医不得皇命不能为皇族以外的人请脉诊病，他倒好，十天半月便往咱们府里跑。',\n",
       "  '嬛妹妹，刚刚我去府上请脉，听甄伯母说你来这里进香了。',\n",
       "  '嬛妹妹，我虽是一介御医，俸禄微薄，可是我保证会一生一世对你好，疼爱你，保护你，永远事事以你为重。本来没半月一次到府上去请脉，能够偶尔见一次妹妹的笑靥，已经心满意足了，可谁知——而且我也知道，妹妹心里是不愿意去殿选的。',\n",
       "  '实初虽然唐突了妹妹，却是真心实意地希望妹妹不要去应选，这不仅仅是因为我心里一直把妹妹当成……其实更是因为甄伯父曾经救过家父的性命。',\n",
       "  '可是我父亲当年被诬，起因也是因为后宫争斗，不能独善其身。一介御医尚且如此，何况妹妹如果被选中的话，会身在其中啊。',\n",
       "  '在京里休息了这些日子，早已经调养过来了。',\n",
       "  '是啊。可是我总还想着我们一起长大的情分呢。诶？妹妹今日打扮得好生素净，可是细看起来还是个美人坯子，怎么看都是好的。',\n",
       "  '你是谁？',\n",
       "  '大理寺少卿，也不是什么高官。'],\n",
       " 'input': ['', '', '', '', '', '', '', '', '', ''],\n",
       " 'output': ['嘘——都说许愿说破是不灵的。',\n",
       "  '你们俩话太多了，我该和温太医要一剂药，好好治治你们。',\n",
       "  '出来走走，也是散心。',\n",
       "  '实初哥哥这么说，就枉顾我们一直以来的兄妹情谊了，嬛儿没有哥哥，一直把你当作自己的亲哥哥一样看待，自然相信哥哥会待妹妹好的——自然了，以后有了嫂子，你也会对嫂子更好。',\n",
       "  '我们两家是世交，昔年恩义不过是父亲随手之劳，不必挂怀。',\n",
       "  '实初哥哥的话我都明白，只是我不去应选，迟早也是玉娆，家中无子，女儿还能不孝吗？',\n",
       "  '如今你住在自己京城的宅子里，不比从前住在外祖家，一墙之隔，见面也方便。',\n",
       "  '沈大美人差矣，姐姐出落得这么标致，皇上见过必定会念念不忘。',\n",
       "  '家父是大理寺少卿甄远道。',\n",
       "  '凡事不论官位高低，只论个理字。']}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b96566f9-f862-47ae-8c1c-9d570068507f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Answer before training:\n",
      "大理寺少卿，也不是什么高官。是不是有些误解？\n",
      "\n",
      "大理寺少卿，听起来像是小说里的角色，或者是某个地方的别称。可是用户说他不是什么高官，这让我有点困惑。可能是因为“少卿”这个词让我联想到“少卿”在古代指的是高官之子的意思，所以误以为大理寺少卿是高官。但实际上，可能在现代或者某些特定语境下，“少卿”指的是普通人，或者是一个虚构的角色名\n"
     ]
    }
   ],
   "source": [
    "# 使用训练前的模型回答问题\n",
    "def generate_response(model, tokenizer, prompt, max_length=128):\n",
    "    inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
    "    outputs = model.generate(**inputs, max_length=max_length)\n",
    "    response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "    return response\n",
    "\n",
    "# 示例问题\n",
    "# question = \"What is the capital of France?\"\n",
    "question = \"大理寺少卿，也不是什么高官。\"\n",
    "print(\"Answer before training:\")\n",
    "print(generate_response(model, tokenizer, question))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "76e68a3a-2319-48c2-82e9-ec60fec06d3a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Currently training with a batch size of: 2\n",
      "***** Running training *****\n",
      "  Num examples = 3,729\n",
      "  Num Epochs = 1\n",
      "  Instantaneous batch size per device = 2\n",
      "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
      "  Gradient Accumulation steps = 4\n",
      "  Total optimization steps = 466\n",
      "  Number of trainable parameters = 6,815,744\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='466' max='466' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [466/466 06:15, Epoch 0/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>80</td>\n",
       "      <td>3.681300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>160</td>\n",
       "      <td>3.012200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>240</td>\n",
       "      <td>2.959800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>320</td>\n",
       "      <td>2.902400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>2.863600</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Saving model checkpoint to ./Llama3_8b_LoRA_2025/checkpoint-466\n",
      "loading configuration file /root/autodl-tmp/deepseek-r1-distill-llama-8b/config.json\n",
      "Model config LlamaConfig {\n",
      "  \"architectures\": [\n",
      "    \"LlamaForCausalLM\"\n",
      "  ],\n",
      "  \"attention_bias\": false,\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 128000,\n",
      "  \"eos_token_id\": 128001,\n",
      "  \"head_dim\": 128,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 4096,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 14336,\n",
      "  \"max_position_embeddings\": 131072,\n",
      "  \"mlp_bias\": false,\n",
      "  \"model_type\": \"llama\",\n",
      "  \"num_attention_heads\": 32,\n",
      "  \"num_hidden_layers\": 32,\n",
      "  \"num_key_value_heads\": 8,\n",
      "  \"pretraining_tp\": 1,\n",
      "  \"rms_norm_eps\": 1e-05,\n",
      "  \"rope_scaling\": {\n",
      "    \"factor\": 8.0,\n",
      "    \"high_freq_factor\": 4.0,\n",
      "    \"low_freq_factor\": 1.0,\n",
      "    \"original_max_position_embeddings\": 8192,\n",
      "    \"rope_type\": \"llama3\"\n",
      "  },\n",
      "  \"rope_theta\": 500000.0,\n",
      "  \"tie_word_embeddings\": false,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.46.0\",\n",
      "  \"use_cache\": true,\n",
      "  \"vocab_size\": 128256\n",
      "}\n",
      "\n",
      "tokenizer config file saved in ./Llama3_8b_LoRA_2025/checkpoint-466/tokenizer_config.json\n",
      "Special tokens file saved in ./Llama3_8b_LoRA_2025/checkpoint-466/special_tokens_map.json\n",
      "Saving model checkpoint to ./Llama3_8b_LoRA_2025/checkpoint-466\n",
      "loading configuration file /root/autodl-tmp/deepseek-r1-distill-llama-8b/config.json\n",
      "Model config LlamaConfig {\n",
      "  \"architectures\": [\n",
      "    \"LlamaForCausalLM\"\n",
      "  ],\n",
      "  \"attention_bias\": false,\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 128000,\n",
      "  \"eos_token_id\": 128001,\n",
      "  \"head_dim\": 128,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 4096,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 14336,\n",
      "  \"max_position_embeddings\": 131072,\n",
      "  \"mlp_bias\": false,\n",
      "  \"model_type\": \"llama\",\n",
      "  \"num_attention_heads\": 32,\n",
      "  \"num_hidden_layers\": 32,\n",
      "  \"num_key_value_heads\": 8,\n",
      "  \"pretraining_tp\": 1,\n",
      "  \"rms_norm_eps\": 1e-05,\n",
      "  \"rope_scaling\": {\n",
      "    \"factor\": 8.0,\n",
      "    \"high_freq_factor\": 4.0,\n",
      "    \"low_freq_factor\": 1.0,\n",
      "    \"original_max_position_embeddings\": 8192,\n",
      "    \"rope_type\": \"llama3\"\n",
      "  },\n",
      "  \"rope_theta\": 500000.0,\n",
      "  \"tie_word_embeddings\": false,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.46.0\",\n",
      "  \"use_cache\": true,\n",
      "  \"vocab_size\": 128256\n",
      "}\n",
      "\n",
      "tokenizer config file saved in ./Llama3_8b_LoRA_2025/checkpoint-466/tokenizer_config.json\n",
      "Special tokens file saved in ./Llama3_8b_LoRA_2025/checkpoint-466/special_tokens_map.json\n",
      "\n",
      "\n",
      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=466, training_loss=3.047603721782373, metrics={'train_runtime': 376.0106, 'train_samples_per_second': 9.917, 'train_steps_per_second': 1.239, 'total_flos': 1.3064298743660544e+16, 'train_loss': 3.047603721782373, 'epoch': 0.9994638069705094})"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 开始训练\n",
    "trainer.train()\n",
    "# # 保存模型\n",
    "trainer.save_model(\"./Llama3_8b_LoRA\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "47de516f-5f96-4577-8543-477b1f6eaaa2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Answer after training:\n",
      "大理寺少卿，也不是什么高官。只是一个小小的寺庙里，一个小小的少卿。\n"
     ]
    }
   ],
   "source": [
    "# 使用训练后的模型回答问题\n",
    "def generate_response(model, tokenizer, prompt, max_length=128):\n",
    "    inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
    "    outputs = model.generate(**inputs, max_length=max_length, eos_token_id=tokenizer.eos_token_id)\n",
    "    response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "    return response\n",
    "\n",
    "question = \"大理寺少卿，也不是什么高官。\"\n",
    "print(\"Answer after training:\")\n",
    "print(generate_response(model, tokenizer, question))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5c60c50-e242-43d1-b268-cac65d0e3ea6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
